packages <- c("CIMseq", "CIMseq.data", "tidyverse", "circlize", "printr")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
load('../../MGA.analysis_enge20/data/pal.rda')
#there are 2 cells that were classified as colon but sorted as SI. These have to
#be removed manually
c <- getData(cObjSng, "classification")
s <- names(c[c %in% c("8", "13")])
i <- which(colnames(getData(cObjSng, "counts")) %in% s)
cObjSng <- CIMseqSinglets(
getData(cObjSng, "counts")[, -i],
getData(cObjSng, "counts.ercc")[, -i],
getData(cObjSng, "dim.red")[-i, ],
getData(cObjSng, "classification")[-i]
)
p <- plotUnsupervisedClass(cObjSng, cObjMul, pal)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20SI.classes.pdf',
device = cairo_pdf,
height = 240,
width = 240,
units = "mm"
)
p <- plotUnsupervisedMarkers(
cObjSng, cObjMul,
c("Lgr5", "Muc2", "Ptprc", "Chga", "Alpi", "Lyz1", "Dclk1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20SI.markers.pdf',
device = cairo_pdf,
height = 240,
width = 240,
units = "mm"
)
p <- plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Mki67"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20SI.cellcycle.pdf',
device = cairo_pdf,
height = 240,
width = 240,
units = "mm"
)
adj <- adjustFractions(cObjSng, cObjMul, sObj)
table(apply(adj, 1, sum))
| 0 | 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|---|
| 19 | 80 | 217 | 86 | 31 | 2 |
tibble(fractions = c(fractions)) %>%
ggplot() +
geom_histogram(aes(fractions), binwidth = 0.01) +
theme_bw()
Range of fractions picked after adjustment.
range(fractions[adj == 1])
## [1] 0.02485269 0.99998969
tibble(
nCellTypes = apply(adj, 1, sum),
cost = getData(sObj, "costs")
) %>%
ggplot() +
geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
theme_bw()
tibble(
sample = rownames(getData(sObj, "fractions")),
cost = unname(getData(sObj, "costs"))
) %>%
inner_join(
select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber),
by = "sample"
) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
) +
theme_bw()
ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections <- nConnections[match(ercc$sample, names(nConnections))]
tibble(
detectedConnections = round(nConnections),
estimatedCellNumber = round(ercc$estimatedCellNumber)
) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(ercc$estimatedCellNumber))
) +
scale_y_continuous(
name = "Detected cell number",
breaks = 0:max(round(nConnections))
) +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.counts = colSums(getData(cObjMul, "counts"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total counts") +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.ercc = colSums(getData(cObjMul, "counts.ercc"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total ERCC counts") +
theme_bw()
singlets <- c(table(getData(cObjSng, "classification")))
singlets <- singlets / sum(singlets)
deconv <- colSums(adjustFractions(cObjSng, cObjMul, sObj))
deconv <- deconv[match(names(singlets), names(deconv))]
deconv <- deconv / sum(deconv)
if(!identical(names(singlets), names(deconv))) stop("name mismatch")
p <- tibble(
class = names(singlets),
singlet.freq = singlets,
multiplet.freq = deconv
) %>%
ggplot() +
geom_point(aes(singlet.freq, multiplet.freq, colour = class), size = 3) +
scale_colour_manual(values = pal[order(names(pal))]) +
xlim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
ylim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
geom_abline(slope = 1, intercept = 0, lty = 3, colour = "grey") +
labs(x = "Singlet relative frequency", y = "Multiplet relative frequency") +
guides(colour = guide_legend(title = "Cell Type")) +
theme_bw()
p
ggsave(
plot = p,
filename = '../figures/MGA.enge20SI.sngMulRelFreq.pdf',
device = cairo_pdf,
height = 180,
width = 180,
units = "mm"
)
plotSwarmCircos(
sObj, cObjSng, cObjMul, classOrder = cOrder, classColour = pal[cOrder],
h.ratio = 0.85
)
## Joining, by = "class"
Only detected duplicates, triplicates, and quadruplicates.
ERCC estimated cell number set to max 4.
Weight cutoff = 10.
# adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
# samples <- rownames(adj)
# rs <- rowSums(adj)
# keep <- rs == 2 | rs == 3 | rs == 4
plotSwarmCircos(
sObj, cObjSng, cObjMul, weightCut = 10,
classOrder = cOrder, theoretical.max = 4, classColour = pal[cOrder],
h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"
pdf('../figures/MGA.enge20SI.circos.pdf', width = 9.5, height = 9.5)
plotSwarmCircos(
sObj, cObjSng, cObjMul, weightCut = 10,
classOrder = cOrder, theoretical.max = 4, classColour = pal[cOrder],
h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"
dev.off()
## quartz_off_screen
## 2